Nombre / destinations - sortants


Nous n’avons pas les chiffres qui remontent aussi loin pour les Sciences de l’Education mais entre 2018 & 2022, il y a eu 6 départs.





Nombre / destinations - entrants






Notes - sortants

dat_num = readxl::read_excel("sortants/liste SPSE - étudiants sortants 2018-2023.xlsx") 
dat_num$disc = dat_num$`Discipline (origine)` 
dat_num = dat_num %>% filter(disc == "Psychologie")
dat_clean_sort$nom = dat_clean_sort$`Individu: Nom complet`
dat_num$nom = dat_num$`Individu: Nom complet`
dat_clean_sort$numero = dat_clean_sort$`Individu: Numéro étudiant`
dat_num_ok = dplyr::left_join(dat_num, dat_clean_sort[,c("numero", "nom")], by = "nom")


dat_num_ok$sem_depart = ifelse(dat_num_ok$`Période de début` != dat_num_ok$`Période de fin`, "s1_s2", ifelse(grepl("1er", dat_num_ok$`Période de fin`, fixed = TRUE), "s1", "s2"))
# vec = strsplit(dat_num_ok$`Période de début`, " +")
# annee = do.call(rbind, lapply(vec, function(x) x[3]))
dat_num_ok$annee_2 = dat_num_ok$`Année académique`
dat_num_ok = dat_num_ok %>% filter(annee_2 != "2022/23")

agg = function(x) {
  xagg = x %>% 
    group_by(numero) %>%
    summarise(
      nom = unique(nom),
      prenom = unique(prenom),
      note1 = max(n1, na.rm = TRUE),
      note2 = max(n2, na.rm = TRUE)
    ) %>%
    group_by(numero) %>%
    slice(n())
  return(xagg)
}

sheet_names <- readxl::excel_sheets("sortants/notes/notes_sortants_upn_hom.xlsx")
my_sheets <- lapply(sheet_names, function(x) readxl::read_excel("sortants/notes/notes_sortants_upn_hom.xlsx", sheet = x))
names(my_sheets) <- sheet_names

a = 0
for (i in unique(names(my_sheets))) {
  a = a+1
  my_sheets[[i]] = suppressWarnings(agg(my_sheets[[i]]))
  my_sheets[[i]]$YEAR = rep(names(my_sheets)[a], nrow(my_sheets[[i]]))
}

res_upn_tot = do.call(rbind, (my_sheets))
res_upn_tot$note1 = as.numeric(as.character(res_upn_tot$note1))
## Warning: NAs introduced by coercion
res_upn_tot$note2 = as.numeric(as.character(res_upn_tot$note2))
## Warning: NAs introduced by coercion
res_upn_tot$NOTE = apply(with(res_upn_tot, cbind(note1, note2)), 1, mean, na.rm = TRUE)
res_upn_tot$annee_1 = gsub(".*_","",res_upn_tot$YEAR)

res = merge(res_upn_tot, dat_num_ok, by = "numero")

res$note_mob = NA
for (id in unique(res$numero)) {
  # print(id)
    x_i = subset(res, numero == id)
    note = NA
  if (unique(gsub("\\/.*","",x_i$annee_2)) %in% x_i$annee_1) {
    if (unique(x_i$annee_2) == "2018/19") {
        if (unique(x_i$sem_depart) == "s1") {
            note = subset(x_i, annee_1 == "2018")$note1
        } else if (unique(x_i$sem_depart) == "s2") {
            note = subset(x_i, annee_1 == "2018")$note2
        } else {
            note = subset(x_i, annee_1 == "2018")$NOTE
        }
    } else if (unique(x_i$annee_2) == "2019/20") {
        if (unique(x_i$sem_depart) == "s1") {
            note = subset(x_i, annee_1 == "2019")$note1
        } else if (unique(x_i$sem_depart) == "s2") {
            note = subset(x_i, annee_1 == "2019")$note2
        } else {
            note = subset(x_i, annee_1 == "2019")$NOTE
        }
    } else if (unique(x_i$annee_2) == "2020/21") {
        if (unique(x_i$sem_depart) == "s1") {
            note = subset(x_i, annee_1 == "2020")$note1
        } else if (unique(x_i$sem_depart) == "s2") {
            note = subset(x_i, annee_1 == "2020")$note2
        } else {
            note = subset(x_i, annee_1 == "2020")$NOTE
        }
    } else if (unique(x_i$annee_2) == "2021/22") {
        if (unique(x_i$sem_depart) == "s1") {
            note = subset(x_i, annee_1 == "2021")$note1
        } else if (unique(x_i$sem_depart) == "s2") {
            note = subset(x_i, annee_1 == "2021")$note2
        } else {
            note = subset(x_i, annee_1 == "2021")$NOTE
        }
    }
    res[which(res$numero == id), "note_mob"] <- note
  }
}

res_ok = res %>% filter(!is.na(note_mob) & !is.nan(NOTE) & note_mob != 0) %>%
  group_by(numero, Pays) %>%
  summarise(note_upn = mean(NOTE),
            note_mob = unique(note_mob)) %>%
  mutate(diff_notes = note_mob - note_upn) %>%
  group_by(Pays) %>%
  summarise(country_diff = mean(diff_notes),
            n_student = n())
## `summarise()` has grouped output by 'numero'. You can override using the
## `.groups` argument.
df <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv")

res_ok$Pays[res_ok$Pays=="Argentine"] <- "Argentina"
res_ok$Pays[res_ok$Pays=="Belgique"] <- "Belgium"
res_ok$Pays[res_ok$Pays=="Canada"] <- "Canada"
res_ok$Pays[res_ok$Pays=="Chili"] <- "Chile"
res_ok$Pays[res_ok$Pays=="Espagne"] <- "Spain"
res_ok$Pays[res_ok$Pays=="Finlande"] <- "Finland"
res_ok$Pays[res_ok$Pays=="Grèce"] <- "Greece"
res_ok$Pays[res_ok$Pays=="Hongrie"] <- "Hungary"
res_ok$Pays[res_ok$Pays=="Italie"] <- "Italy"
res_ok$Pays[res_ok$Pays=="Norvège"] <- "Norway"
res_ok$Pays[res_ok$Pays=="Pays-Bas"] <- "Netherlands"
res_ok$Pays[res_ok$Pays=="Pologne"] <- "Poland"
res_ok$COUNTRY = res_ok$Pays

fig <- plot_ly(res_ok, type='choropleth', locations=res_ok$Pays, z=res_ok$country_diff, text=res_ok$Pays, colorscale="Blues")


df2 = left_join(df,res_ok)
## Joining, by = "COUNTRY"
df2$country_diff = round(df2$country_diff,3)
# df2$color = ifelse(is.na(df2$country_diff), "#D9D9D9",
#  ifelse(df2$country_diff < -2, "#FE0000",
#   ifelse(df2$country_diff < -1, "#FF7B7B",
#   ifelse(df2$country_diff < 1, "#C6F08B", "#20DB00"))))
df2$color = ifelse(is.na(df2$country_diff), "red",
 ifelse(df2$country_diff < -2, "#000000",
  ifelse(df2$country_diff < -1, "black",
  ifelse(df2$country_diff < 1, "blue", "#20DB00"))))


fig <- plot_ly(df2, type='choropleth', locations=df2$CODE, z=df2$country_diff, text=df2$COUNTRY, colorscale='coloraxis');fig
## Warning: Ignoring 210 observations